Drive BC Network Analysis

Author

Robert Yacovelli

Data Loading and Initial Cleaning

suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(readr))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(igraph))
suppressPackageStartupMessages(library(lubridate))
suppressPackageStartupMessages(library(stringr))
# Function to standardize attribute names and handle event type variations
standardize_name <- function(x) {
  x <- x %>%
    tolower() %>%
    gsub("_", "", .) %>%
    gsub(" ", "", .) %>%
    trimws()
}

# Function to load and clean data for a given year
load_and_clean_data <- function(file_path) {
  data <- read_csv(file_path, show_col_types = FALSE)
  
  # Check if this is pre-2018 or post-2018 format
  if("cause" %in% names(data)) {
    # Pre-2018 format
    data <- data %>%
      rename(
        EVENT_TYPE = type,
        AREA_NAME = district,
        SEVERITY = severity,
        START_DATETIME = localupdatetime
      )
  }
  
  # Standardize values and convert types
  data <- data %>%
    mutate(
      EVENT_TYPE = standardize_name(EVENT_TYPE),
      AREA_NAME = standardize_name(AREA_NAME),
      SEVERITY = standardize_name(SEVERITY),
      START_DATETIME = parse_date_time(START_DATETIME,
                                       orders = c("ymd HMS", "mdy HMS", "dmy HMS",
                                                  "ymd HM", "mdy HM", "dmy HM",
                                                  "ymd", "mdy", "dmy"))
    ) %>%
    filter(!is.na(EVENT_TYPE)) %>%        # Remove NA event types
    filter(!EVENT_TYPE %in% c("planned"))  # Additional removal if needed
  
  # Select only the columns we need
  data <- data %>%
    select(EVENT_TYPE, AREA_NAME, SEVERITY, START_DATETIME) %>%
    drop_na(START_DATETIME)
  
  return(data)
}
# all the years apply the cleaning functions
data_files <- list.files("../data", pattern = "drivebceventshist.*\\.csv", full.names = TRUE)
all_data <- lapply(data_files, load_and_clean_data)

# Combine all data into a single dataframe
drivebc_data <- bind_rows(all_data)

# Convert columns to appropriate data types
drivebc_data <- drivebc_data %>%
  mutate(
    EVENT_TYPE = as.factor(EVENT_TYPE),
    AREA_NAME = as.factor(AREA_NAME),
    SEVERITY = as.factor(SEVERITY)
  )

str(drivebc_data)
tibble [2,748,950 × 4] (S3: tbl_df/tbl/data.frame)
 $ EVENT_TYPE    : Factor w/ 7 levels "construction",..: 2 2 2 2 2 2 2 2 2 2 ...
 $ AREA_NAME     : Factor w/ 13 levels "bulkleystikinedistrict",..: 4 4 4 4 13 9 9 13 8 8 ...
 $ SEVERITY      : Factor w/ 3 levels "major","minor",..: 3 3 3 3 3 3 3 3 3 3 ...
 $ START_DATETIME: POSIXct[1:2748950], format: "2006-01-03 08:06:50" "2006-02-13 16:03:49" ...
head(drivebc_data)

Time/Event Bipartite Graphs

create_bipartite_graph <- function(year_data, year) {
  area_names <- unique(year_data$AREA_NAME)
  active_events <- unique(year_data$EVENT_TYPE)
  
  # areas and events as nodes
  nodes <- data.frame(
    name = c(as.character(area_names), as.character(active_events)),
    type = c(rep(TRUE, length(area_names)), 
             rep(FALSE, length(active_events)))
  )
  
  # edge exists if a particular type of event occurred in a particular area
  edges <- year_data %>%
    select(AREA_NAME, EVENT_TYPE) %>%
    distinct()

  g <- graph_from_data_frame(d = edges, vertices = nodes, directed = FALSE)
  
  # vertex attributes
  V(g)$color <- ifelse(V(g)$type, "lightblue", "lightgreen")
  V(g)$shape <- ifelse(V(g)$type, "circle", "square")
  V(g)$size <- 4 
  V(g)$label.cex <- 0.7
  V(g)$label.dist <- .75
  V(g)$label.degree <- ifelse(seq_along(V(g)) %% 2 == 0, pi/2, -pi/2)
  
  return(g)
}
# graphs for each year
years <- 2006:2023

for(year in years) {
  year_data <- drivebc_data %>%
    filter(year(START_DATETIME) == year)
  
  if(nrow(year_data) == 0) {
    plot.new()
    title(main = paste("No Data Available for", year))
    next
  }
  
  g <- create_bipartite_graph(year_data, year)

  plot(g, 
      layout = layout_as_bipartite,
      vertex.label = V(g)$name,
      vertex.color = V(g)$color,
      vertex.shape = V(g)$shape,
      vertex.size = V(g)$size,
      vertex.label.cex = V(g)$label.cex,
      vertex.label.dist = V(g)$label.dist,
      vertex.label.degree = V(g)$label.degree,
      edge.width = 0.3,
      main = paste("BC Road Events", year)) 
}
Figure 1
Figure 2
Figure 3
Figure 4
Figure 5
Figure 6
Figure 7
Figure 8
Figure 9
Figure 10
Figure 11
Figure 12
Figure 13
Figure 14
Figure 15
Figure 16
Figure 17
Figure 18